home *** CD-ROM | disk | FTP | other *** search
Text File | 1996-07-02 | 3.6 KB | 121 lines | [TEXT/CWIE] |
- // ===========================================================================
- // DMultiStringLocator.h
- // ---------------------
- // ©1996 Eric Gundrum, All rights reserved.
- //
- // The contents of this file may be freely altered and freely distributed
- // in any form, provided this copyright statement is retained unaltered.
- // Add your own changes below.
- // ---------------------
- //
- // Based on source code provided in Practical Algorithms for Programmers,
- // by Binstock and Rex, published in 1995 by Addison Wesley.
- //
- // Simultaneously search text for multiple strings using the Aho/Corasick
- // algorithm.
- //
-
- #pragma once
-
- // ANSI C++ headers
- // #include <stdexcept>
-
- // Mac Headers
- #include <Types.h> // for Boolean
-
- // PowerPlant Headers
- #include <LList.h>
- #include <LString.h>
-
-
- #pragma mark --- DSearchString declarations ---
- // ===========================================================================
- // ----------- DSearchString declarations ----------
- // ===========================================================================
- // An abstract class containing the string and identifying the function
- // called when this string is found.
-
- class DSearchString : public LStr255
- {
- public:
- virtual ~DSearchString() {;}
- DSearchString() {;}
-
- virtual Boolean ReportFound( long /*inOffset*/ ) = 0;
- // inOffset indicates the location in the buffer
- // of the last character of the recognized string.
-
- private:
- // stop defaults
- DSearchString( const DSearchString &inOriginal );
-
- };
-
-
- #pragma mark --- DMultiStringLocator declarations ---
- // ===========================================================================
- // ----------- DMultiStringLocator declarations ----------
- // ===========================================================================
- class DMultiStringLocator
- {
- public:
- virtual ~DMultiStringLocator();
- DMultiStringLocator
- ( LList &inStringList // list of search strings
- , int inAlphabetSize = 256 // size of BranchTable
- );
-
- virtual int SearchBuffer ( Uchar *inBufferP, int inSize );
-
- class error {};
- class memerror : public error {};
- class state_table_exceeded : public error {};
- class input_item_NULL : public error {};
- class list_item_NULL : public error {};
-
- enum { searchStatus_stop = -1 };
-
- protected:
- // stop defaults
- DMultiStringLocator( const DMultiStringLocator &inOriginal );
- DMultiStringLocator();
-
- void AddString ( DSearchString &inString );
- typedef int stateT;
- void AddStateTrans
- ( int matchChar
- , stateT currentState
- , stateT nextState
- );
- void RetryArrayInit();
- void FindRetryState
- ( int inChar, stateT currentState, stateT nextState );
-
- void QueueAdd( int *queue, int gbeg, stateT inState );
-
- stateT mMaxState; // max space for states
- stateT mHighState; // track the next free state
- int mAlphabetSize; // size of BranchTable
-
- int *MatchArray; // First level of matching:
- #define BRANCH -1 // match flag: use branch table
- #define EMPTY_SLOT -2 // match flag: unused slot
-
- #define FAIL_STATE -1 // for GototState and BranchTable
- #define state_begin 0 // the starting state
-
- stateT *RetryArray; // jump to new search word (state) when search fails
-
- LList *OutArray; // results objects indexed by end state
-
- union GotoTable // destination of MatchArray values
- {
- stateT GotoState; // destination if MatchArray is char
- stateT *BranchTable; // else goto this BRANCH branch table
- } *GotoArray;
- };
-
-
- // ===========================================================================
-
-